3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c
4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.7-xen-sparse/drivers/char/mem.c
40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.7-xen-sparse/drivers/xen/Makefile
-40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.7-xen-sparse/drivers/xen/block/Kconfig
-40f562395atl9x4suKGhPkjqLOXESg linux-2.6.7-xen-sparse/drivers/xen/block/Makefile
-40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.7-xen-sparse/drivers/xen/block/block.c
-40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.7-xen-sparse/drivers/xen/block/block.h
-40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.7-xen-sparse/drivers/xen/block/vbd.c
+410d0893otFGghmv4dUXDUBBdY5aIA linux-2.6.7-xen-sparse/drivers/xen/blkback/Makefile
+4087cf0d1XgMkooTZAiJS6NrcpLQNQ linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
+4087cf0dZadZ8r6CEt4fNN350Yle3A linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h
+4087cf0dxlh29iw0w-9rxOCEGCjPcw linux-2.6.7-xen-sparse/drivers/xen/blkback/control.c
+4087cf0dbuoH20fMjNZjcgrRK-1msQ linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c
+4087cf0dk97tacDzxfByWV7JifUYqA linux-2.6.7-xen-sparse/drivers/xen/blkback/vbd.c
+40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.7-xen-sparse/drivers/xen/blkfront/Kconfig
+40f562395atl9x4suKGhPkjqLOXESg linux-2.6.7-xen-sparse/drivers/xen/blkfront/Makefile
+40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
+40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.7-xen-sparse/drivers/xen/blkfront/block.h
+40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.7-xen-sparse/drivers/xen/blkfront/vbd.c
40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.7-xen-sparse/drivers/xen/console/Makefile
3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.7-xen-sparse/drivers/xen/console/console.c
40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.7-xen-sparse/drivers/xen/evtchn/Makefile
endmenu
+# Xen's block device backend driver needs 2^12 pages
+config FORCE_MAX_ZONEORDER
+ int
+ default "12" if XEN_PHYSDEV_ACCESS
+ default "11" if !XEN_PHYSDEV_ACCESS
#config VT
# bool
#
CONFIG_XEN_PRIVILEGED_GUEST=y
CONFIG_XEN_PHYSDEV_ACCESS=y
+CONFIG_FORCE_MAX_ZONEORDER=12
CONFIG_X86=y
# CONFIG_X86_64 is not set
#
# CONFIG_XEN_PRIVILEGED_GUEST is not set
# CONFIG_XEN_PHYSDEV_ACCESS is not set
+CONFIG_FORCE_MAX_ZONEORDER=11
CONFIG_X86=y
# CONFIG_X86_64 is not set
-obj-y += block/
+obj-y += blkfront/
obj-y += console/
obj-y += evtchn/
obj-y += netfront/
obj-y += privcmd/
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += blkback/
obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += netback/
+
--- /dev/null
+
+obj-y := blkback.o control.o interface.o vbd.o
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/main.c
+ *
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A
+ * reference front-end implementation can be found in:
+ * arch/xen/drivers/blkif/frontend
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ *
+ * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+
+/*
+ * NB. We place a page of padding between each buffer page to avoid incorrect
+ * merging of requests by the IDE and SCSI merging routines. Otherwise, two
+ * adjacent buffers in a scatter-gather request would have adjacent page
+ * numbers: since the merge routines don't realise that this is in *pseudophys*
+ * space, not real space, they may collapse the s-g elements!
+ */
+static unsigned long mmap_vstart;
+#define MMAP_PAGES_PER_REQUEST \
+ (2 * (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1))
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * 2 * PAGE_SIZE))
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+ blkif_t *blkif;
+ unsigned long id;
+ int nr_pages;
+ atomic_t pendcnt;
+ unsigned short operation;
+ int status;
+} pending_req_t;
+
+/*
+ * We can't allocate pending_req's in order, since they may complete out of
+ * order. We therefore maintain an allocation ring. This ring also indicates
+ * when enough work has been passed down -- at that point the allocation ring
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+/* NB. We use a different index type to differentiate from shared blk rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+#if 0
+static kmem_cache_t *buffer_head_cachep;
+#endif
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
+static void make_response(blkif_t *blkif, unsigned long id,
+ unsigned short op, int st);
+
+static void fast_flush_area(int idx, int nr_pages)
+{
+ multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+ int i;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ mcl[i].op = __HYPERVISOR_update_va_mapping;
+ mcl[i].args[0] = MMAP_VADDR(idx, i) >> PAGE_SHIFT;
+ mcl[i].args[1] = 0;
+ mcl[i].args[2] = 0;
+ }
+
+ mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+ if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+ BUG();
+}
+
+
+/******************************************************************
+ * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+ return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( !__on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( __on_blkdev_list(blkif) )
+ {
+ list_del(&blkif->blkdev_list);
+ blkif->blkdev_list.next = NULL;
+ blkif_put(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( __on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+ {
+ list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+ blkif_get(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int blkio_schedule(void *arg)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ blkif_t *blkif;
+ struct list_head *ent;
+
+ for ( ; ; )
+ {
+ /* Wait for work to do. */
+ add_wait_queue(&blkio_schedule_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
+ list_empty(&blkio_schedule_list) )
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&blkio_schedule_wait, &wq);
+
+ /* Queue up a batch of requests. */
+ while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+ !list_empty(&blkio_schedule_list) )
+ {
+ ent = blkio_schedule_list.next;
+ blkif = list_entry(ent, blkif_t, blkdev_list);
+ blkif_get(blkif);
+ remove_from_blkdev_list(blkif);
+ if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+ add_to_blkdev_list_tail(blkif);
+ blkif_put(blkif);
+ }
+
+#if 0 /* XXXcl tq */
+ /* Push the batch through to disc. */
+ run_task_queue(&tq_disk);
+#endif
+ }
+}
+
+static void maybe_trigger_blkio_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+ !list_empty(&blkio_schedule_list) )
+ wake_up(&blkio_schedule_wait);
+}
+
+
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
+{
+ unsigned long flags;
+
+ /* An error fails the entire request. */
+ if ( !uptodate )
+ {
+ DPRINTK("Buffer not up-to-date at end of operation\n");
+ pending_req->status = BLKIF_RSP_ERROR;
+ }
+
+ if ( atomic_dec_and_test(&pending_req->pendcnt) )
+ {
+ int pending_idx = pending_req - pending_reqs;
+ fast_flush_area(pending_idx, pending_req->nr_pages);
+ make_response(pending_req->blkif, pending_req->id,
+ pending_req->operation, pending_req->status);
+ blkif_put(pending_req->blkif);
+ spin_lock_irqsave(&pend_prod_lock, flags);
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ spin_unlock_irqrestore(&pend_prod_lock, flags);
+ maybe_trigger_blkio_schedule();
+ }
+}
+
+static int end_block_io_op(struct bio *bio, unsigned int done, int error)
+{
+ if (done || error) /* XXXcl */
+ __end_block_io_op(bio->bi_private, done);
+#if 0
+ kmem_cache_free(buffer_head_cachep, bh);
+#else
+ bio_put(bio);
+#endif
+ return error;
+}
+
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ blkif_t *blkif = dev_id;
+ add_to_blkdev_list_tail(blkif);
+ maybe_trigger_blkio_schedule();
+ return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+ blkif_ring_t *blk_ring = blkif->blk_ring_base;
+ blkif_request_t *req;
+ BLKIF_RING_IDX i;
+ int more_to_do = 0;
+
+ /* Take items off the comms ring, taking care not to overflow. */
+ for ( i = blkif->blk_req_cons;
+ (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) !=
+ BLKIF_RING_SIZE);
+ i++ )
+ {
+ if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+ {
+ more_to_do = 1;
+ break;
+ }
+
+ req = &blk_ring->ring[MASK_BLKIF_IDX(i)].req;
+ switch ( req->operation )
+ {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ dispatch_rw_block_io(blkif, req);
+ break;
+
+ case BLKIF_OP_PROBE:
+ dispatch_probe(blkif, req);
+ break;
+
+ default:
+ DPRINTK("error: unknown block io operation [%d]\n",
+ blk_ring->ring[i].req.operation);
+ make_response(blkif, blk_ring->ring[i].req.id,
+ blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR);
+ break;
+ }
+ }
+
+ blkif->blk_req_cons = i;
+ return more_to_do;
+}
+
+static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
+{
+ int rsp = BLKIF_RSP_ERROR;
+ int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+ /* We expect one buffer only. */
+ if ( unlikely(req->nr_segments != 1) )
+ goto out;
+
+ /* Make sure the buffer is page-sized. */
+ if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
+ (blkif_last_sect(req->frame_and_sects[0]) != 7) )
+ goto out;
+
+ if ( HYPERVISOR_update_va_mapping_otherdomain(
+ MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT,
+ (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
+ 0, blkif->domid) )
+ goto out;
+
+ rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
+ PAGE_SIZE / sizeof(vdisk_t));
+
+ out:
+ fast_flush_area(pending_idx, 1);
+ make_response(blkif, req->id, req->operation, rsp);
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
+{
+ extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
+#if 0
+ struct buffer_head *bh;
+#else
+ struct bio *bio;
+#endif
+ int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
+ short nr_sects;
+ unsigned long buffer, fas;
+ int i, j, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+ pending_req_t *pending_req;
+ unsigned long remap_prot;
+ multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+
+ /* We map virtual scatter/gather segments to physical segments. */
+ int new_segs, nr_psegs = 0;
+ phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST + 1];
+
+ /* Check that number of segments is sane. */
+ if ( unlikely(req->nr_segments == 0) ||
+ unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
+ {
+ DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
+ goto bad_descriptor;
+ }
+
+ /*
+ * Check each address/size pair is sane, and convert into a
+ * physical device and block offset. Note that if the offset and size
+ * crosses a virtual extent boundary, we may end up with more
+ * physical scatter/gather segments than virtual segments.
+ */
+ for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
+ {
+ fas = req->frame_and_sects[i];
+ buffer = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
+ nr_sects = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+
+ if ( nr_sects <= 0 )
+ goto bad_descriptor;
+
+ phys_seg[nr_psegs].ps_device = req->device;
+ phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
+ phys_seg[nr_psegs].buffer = buffer;
+ phys_seg[nr_psegs].nr_sects = nr_sects;
+
+ /* Translate the request into the relevant 'physical device' */
+ new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
+ if ( new_segs < 0 )
+ {
+ DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
+ operation == READ ? "read" : "write",
+ req->sector_number + tot_sects,
+ req->sector_number + tot_sects + nr_sects,
+ req->device);
+ goto bad_descriptor;
+ }
+
+ nr_psegs += new_segs;
+ ASSERT(nr_psegs <= (BLKIF_MAX_SEGMENTS_PER_REQUEST+1));
+ }
+
+ /* Nonsensical zero-sized request? */
+ if ( unlikely(nr_psegs == 0) )
+ goto bad_descriptor;
+
+ if ( operation == READ )
+ remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+ else
+ remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
+
+ for ( i = 0; i < nr_psegs; i++ )
+ {
+ mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
+ mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT;
+ mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot;
+ mcl[i].args[2] = 0;
+ mcl[i].args[3] = blkif->domid;
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+ phys_seg[i].buffer >> PAGE_SHIFT;
+ }
+
+ if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) )
+ BUG();
+
+ for ( i = 0; i < nr_psegs; i++ )
+ {
+ if ( unlikely(mcl[i].args[5] != 0) )
+ {
+ DPRINTK("invalid buffer -- could not remap it\n");
+ fast_flush_area(pending_idx, nr_psegs);
+ goto bad_descriptor;
+ }
+ }
+
+ pending_req = &pending_reqs[pending_idx];
+ pending_req->blkif = blkif;
+ pending_req->id = req->id;
+ pending_req->operation = operation;
+ pending_req->status = BLKIF_RSP_OKAY;
+ pending_req->nr_pages = nr_psegs;
+ atomic_set(&pending_req->pendcnt, nr_psegs);
+ pending_cons++;
+
+ blkif_get(blkif);
+
+ /* Now we pass each segment down to the real blkdev layer. */
+#if 0
+ for ( i = 0; i < nr_psegs; i++ )
+ {
+ bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
+ if ( unlikely(bh == NULL) )
+ {
+ __end_block_io_op(pending_req, 0);
+ continue; /* XXXcl continue!? */
+ }
+ memset(bh, 0, sizeof (struct buffer_head));
+
+ init_waitqueue_head(&bh->b_wait);
+ bh->b_size = phys_seg[i].nr_sects << 9;
+ bh->b_dev = phys_seg[i].dev;
+ bh->b_rdev = phys_seg[i].dev;
+ bh->b_rsector = (unsigned long)phys_seg[i].sector_number;
+ bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
+ (phys_seg[i].buffer & ~PAGE_MASK);
+ bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i));
+ bh->b_end_io = end_block_io_op;
+ bh->b_private = pending_req;
+
+ bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
+ (1 << BH_Req) | (1 << BH_Launder);
+ if ( operation == WRITE )
+ bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
+
+ atomic_set(&bh->b_count, 1);
+
+ /* Dispatch a single request. We'll flush it to disc later. */
+ generic_make_request(operation, bh);
+ }
+#else
+ for ( i = 0; i < nr_psegs; i++ )
+ {
+ int nr_iovecs = PFN_UP(phys_seg[i].nr_sects << 9);
+ ASSERT(nr_iovecs == 1);
+ bio = bio_alloc(GFP_ATOMIC, nr_iovecs);
+ if ( unlikely(bio == NULL) )
+ {
+ __end_block_io_op(pending_req, 0);
+ break;
+ }
+ bio->bi_bdev = phys_seg[i].ps_bdev;
+ bio->bi_private = pending_req;
+ bio->bi_end_io = end_block_io_op;
+ bio->bi_sector = phys_seg[i].sector_number;
+ bio->bi_rw = operation;
+
+ bio->bi_size = 0;
+
+ for ( j = 0; j < nr_iovecs; j++ )
+ {
+ struct bio_vec *bv = bio_iovec_idx(bio, j);
+
+ bv->bv_page = virt_to_page(MMAP_VADDR(pending_idx, i));
+ bv->bv_len = phys_seg[i].nr_sects << 9;
+ bv->bv_offset = phys_seg[i].buffer & ~PAGE_MASK;
+
+ bio->bi_size =+ bv->bv_len;
+ bio->bi_vcnt++;
+ }
+
+ submit_bio(operation, bio);
+ }
+#endif
+
+ return;
+
+ bad_descriptor:
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+}
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(blkif_t *blkif, unsigned long id,
+ unsigned short op, int st)
+{
+ blkif_response_t *resp;
+ unsigned long flags;
+
+ /* Place on the response ring for the relevant domain. */
+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+ resp = &blkif->blk_ring_base->
+ ring[MASK_BLKIF_IDX(blkif->blk_resp_prod)].resp;
+ resp->id = id;
+ resp->operation = op;
+ resp->status = st;
+ wmb();
+ blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+ /* Kick the relevant domain. */
+ notify_via_evtchn(blkif->evtchn);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+ remove_from_blkdev_list(blkif);
+}
+
+static int __init blkif_init(void)
+{
+ int i;
+
+ if ( !(start_info.flags & SIF_INITDOMAIN)
+ && !(start_info.flags & SIF_BLK_BE_DOMAIN) )
+ return 0;
+
+ blkif_interface_init();
+
+ if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
+ BUG();
+
+ pending_cons = 0;
+ pending_prod = MAX_PENDING_REQS;
+ memset(pending_reqs, 0, sizeof(pending_reqs));
+ for ( i = 0; i < MAX_PENDING_REQS; i++ )
+ pending_ring[i] = i;
+
+ spin_lock_init(&blkio_schedule_list_lock);
+ INIT_LIST_HEAD(&blkio_schedule_list);
+
+ if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ BUG();
+
+#if 0
+ buffer_head_cachep = kmem_cache_create(
+ "buffer_head_cache", sizeof(struct buffer_head),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+#endif
+
+ blkif_ctrlif_init();
+
+ return 0;
+}
+
+__initcall(blkif_init);
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/common.h
+ */
+
+#ifndef __BLKIF__BACKEND__COMMON_H__
+#define __BLKIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm-xen/ctrl_if.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include <asm-xen/blkif.h>
+#else
+#include "../blkif.h"
+#define irqreturn_t void
+#define IRQ_HANDLED
+#endif
+
+#if 0
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+typedef struct blkif_st {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Physical parameters of the comms window. */
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
+ /* Comms information. */
+ blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
+ BLKIF_RING_IDX blk_req_cons; /* Request consumer. */
+ BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */
+ /* VBDs attached to this interface. */
+ struct rb_root vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */
+ spinlock_t vbd_lock; /* Protects VBD mapping. */
+ /* Private fields. */
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
+ struct blkif_st *hash_next;
+ struct list_head blkdev_list;
+ spinlock_t blk_ring_lock;
+ atomic_t refcnt;
+} blkif_t;
+
+void blkif_create(blkif_be_create_t *create);
+void blkif_destroy(blkif_be_destroy_t *destroy);
+void blkif_connect(blkif_be_connect_t *connect);
+int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
+void __blkif_disconnect_complete(blkif_t *blkif);
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define blkif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ __blkif_disconnect_complete(_b); \
+ } while (0)
+
+/* An entry in a list of xen_extents. */
+typedef struct _blkif_extent_le {
+ blkif_extent_t extent; /* an individual extent */
+ struct _blkif_extent_le *next; /* and a pointer to the next */
+ struct block_device *bdev;
+} blkif_extent_le_t;
+
+typedef struct _vbd {
+ blkif_vdev_t vdevice; /* what the domain refers to this vbd as */
+ unsigned char readonly; /* Non-zero -> read-only */
+ unsigned char type; /* VDISK_TYPE_xxx */
+ blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */
+ struct rb_node rb; /* for linking into R-B tree lookup struct */
+} vbd_t;
+
+void vbd_create(blkif_be_vbd_create_t *create);
+void vbd_grow(blkif_be_vbd_grow_t *grow);
+void vbd_shrink(blkif_be_vbd_shrink_t *shrink);
+void vbd_destroy(blkif_be_vbd_destroy_t *delete);
+int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
+void destroy_all_vbds(blkif_t *blkif);
+
+/* Describes a [partial] disk extent (part of a block io request) */
+typedef struct {
+ union {
+ unsigned short dev;
+ struct block_device *bdev;
+ } _dev;
+ unsigned short nr_sects;
+ unsigned long buffer;
+ blkif_sector_t sector_number;
+} phys_seg_t;
+#define ps_device _dev.dev
+#define ps_bdev _dev.bdev
+
+int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation);
+
+void blkif_interface_init(void);
+void blkif_ctrlif_init(void);
+
+void blkif_deschedule(blkif_t *blkif);
+
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+#endif /* __BLKIF__BACKEND__COMMON_H__ */
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/control.c
+ *
+ * Routines for interfacing with the control plane.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
+
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ blkif_create((blkif_be_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_CONNECT:
+ if ( msg->length != sizeof(blkif_be_connect_t) )
+ goto parse_error;
+ blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(blkif_be_disconnect_t) )
+ goto parse_error;
+ if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
+ return; /* Sending the response is deferred until later. */
+ break;
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ if ( msg->length != sizeof(blkif_be_vbd_create_t) )
+ goto parse_error;
+ vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_VBD_DESTROY:
+ if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
+ goto parse_error;
+ vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
+ goto parse_error;
+ vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_VBD_SHRINK:
+ if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
+ goto parse_error;
+ vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]);
+ break;
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+void blkif_ctrlif_init(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_be_driver_status_changed_t st;
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(blkif_be_driver_status_changed_t);
+ st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/interface.c
+ *
+ * Block-device interface management.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#endif
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static kmem_cache_t *blkif_cachep;
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
+ blkif = blkif->hash_next;
+ return blkif;
+}
+
+void __blkif_disconnect_complete(blkif_t *blkif)
+{
+ ctrl_msg_t cmsg;
+ blkif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in __blkif_disconnect() because at that point there
+ * may be outstanding requests at the disc whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(blkif->evtchn);
+ vfree(blkif->blk_ring_base);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
+ cmsg.id = blkif->disconnect_rspid;
+ cmsg.length = sizeof(blkif_be_disconnect_t);
+ disc.domid = blkif->domid;
+ disc.blkif_handle = blkif->handle;
+ disc.status = BLKIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'blkif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( blkif->status != DISCONNECTING )
+ BUG();
+ blkif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void blkif_create(blkif_be_create_t *create)
+{
+ domid_t domid = create->domid;
+ unsigned int handle = create->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
+ {
+ DPRINTK("Could not create blkif: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
+ spin_lock_init(&blkif->vbd_lock);
+ spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 0);
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+ {
+ DPRINTK("Could not create blkif: already exists\n");
+ create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+ kmem_cache_free(blkif_cachep, blkif);
+ return;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
+
+ blkif->hash_next = *pblkif;
+ *pblkif = blkif;
+
+ DPRINTK("Successfully created blkif\n");
+ create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_destroy(blkif_be_destroy_t *destroy)
+{
+ domid_t domid = destroy->domid;
+ unsigned int handle = destroy->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif = *pblkif) != NULL )
+ {
+ if ( (blkif->domid == domid) && (blkif->handle == handle) )
+ {
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pblkif = &blkif->hash_next;
+ }
+
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pblkif = blkif->hash_next;
+ destroy_all_vbds(blkif);
+ kmem_cache_free(blkif_cachep, blkif);
+ destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_connect(blkif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->blkif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n",
+ connect->domid, connect->blkif_handle);
+ connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ if ( error == -ENOMEM )
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT )
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ else
+ connect->status = BLKIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( blkif->status != DISCONNECTED )
+ {
+ connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ blkif->evtchn = evtchn;
+ blkif->irq = bind_evtchn_to_irq(evtchn);
+ blkif->shmem_frame = shmem_frame;
+ blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+ blkif->status = CONNECTED;
+ blkif_get(blkif);
+
+ request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
+
+ connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ unsigned int handle = disconnect->blkif_handle;
+ blkif_t *blkif;
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_disconnect attempted for non-existent blkif"
+ " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle);
+ disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( blkif->status == CONNECTED )
+ {
+ blkif->status = DISCONNECTING;
+ blkif->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(blkif->irq, blkif);
+ blkif_deschedule(blkif);
+ blkif_put(blkif);
+ return 0; /* Caller should not send response message. */
+ }
+
+ disconnect->status = BLKIF_BE_STATUS_OKAY;
+ return 1;
+}
+
+void __init blkif_interface_init(void)
+{
+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
+ 0, 0, NULL, NULL);
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+}
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/vbd.c
+ *
+ * Routines for managing virtual block devices (VBDs).
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+static dev_t vbd_map_devnum(blkif_pdev_t);
+
+void vbd_create(blkif_be_vbd_create_t *create)
+{
+ vbd_t *vbd;
+ struct rb_node **rb_p, *rb_parent = NULL;
+ blkif_t *blkif;
+ blkif_vdev_t vdevice = create->vdevice;
+
+ blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n",
+ create->domid, create->blkif_handle);
+ create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ spin_lock(&blkif->vbd_lock);
+
+ rb_p = &blkif->vbd_rb.rb_node;
+ while ( *rb_p != NULL )
+ {
+ rb_parent = *rb_p;
+ vbd = rb_entry(rb_parent, vbd_t, rb);
+ if ( vdevice < vbd->vdevice )
+ {
+ rb_p = &rb_parent->rb_left;
+ }
+ else if ( vdevice > vbd->vdevice )
+ {
+ rb_p = &rb_parent->rb_right;
+ }
+ else
+ {
+ DPRINTK("vbd_create attempted for already existing vbd\n");
+ create->status = BLKIF_BE_STATUS_VBD_EXISTS;
+ goto out;
+ }
+ }
+
+ if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+ {
+ DPRINTK("vbd_create: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ goto out;
+ }
+
+ vbd->vdevice = vdevice;
+ vbd->readonly = create->readonly;
+ vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
+ vbd->extents = NULL;
+
+ rb_link_node(&vbd->rb, rb_parent, rb_p);
+ rb_insert_color(&vbd->rb, &blkif->vbd_rb);
+
+ DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
+ vdevice, create->domid);
+ create->status = BLKIF_BE_STATUS_OKAY;
+
+ out:
+ spin_unlock(&blkif->vbd_lock);
+}
+
+
+/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
+void vbd_grow(blkif_be_vbd_grow_t *grow)
+{
+ blkif_t *blkif;
+ blkif_extent_le_t **px, *x;
+ vbd_t *vbd = NULL;
+ struct rb_node *rb;
+ blkif_vdev_t vdevice = grow->vdevice;
+#if 0
+ unsigned long sz;
+#endif
+
+
+ blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("vbd_grow attempted for non-existent blkif (%u,%u)\n",
+ grow->domid, grow->blkif_handle);
+ grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ spin_lock(&blkif->vbd_lock);
+
+ rb = blkif->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ break;
+ }
+
+ if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
+ {
+ DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
+ grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ goto out;
+ }
+
+ if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t),
+ GFP_KERNEL)) == NULL) )
+ {
+ DPRINTK("vbd_grow: out of memory\n");
+ grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ goto out;
+ }
+
+ x->extent.device = grow->extent.device;
+ /* XXXcl see comments at top of open_by_devnum */
+#if 01
+ x->bdev = open_by_devnum(vbd_map_devnum(x->extent.device),
+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
+#endif
+ /* XXXcl maybe bd_claim? */
+ x->extent.sector_start = grow->extent.sector_start;
+ x->extent.sector_length = grow->extent.sector_length;
+ x->next = (blkif_extent_le_t *)NULL;
+
+#if 0
+ if( !blk_size[MAJOR(x->extent.device)] )
+ {
+ DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
+ grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ goto out;
+ }
+
+ /* convert blocks (1KB) to sectors */
+ sz = blk_size[MAJOR(x->extent.device)][MINOR(x->extent.device)] * 2;
+#endif
+
+ if ( x->extent.sector_start > 0 )
+ {
+ DPRINTK("vbd_grow: device %08x start not zero!\n", x->extent.device);
+ grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ goto out;
+ }
+
+#if 0
+ /*
+ * NB. This test assumes sector_start == 0, which is always the case
+ * in Xen 1.3. In fact the whole grow/shrink interface could do with
+ * some simplification.
+ */
+ if ( x->extent.sector_length > sz )
+ x->extent.sector_length = sz;
+
+ DPRINTK("vbd_grow: requested_len %llu actual_len %lu\n",
+ x->extent.sector_length, sz);
+#endif
+
+ for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
+ continue;
+
+ *px = x;
+
+ DPRINTK("Successful grow of vdev=%04x (dom=%u)\n",
+ vdevice, grow->domid);
+
+ grow->status = BLKIF_BE_STATUS_OKAY;
+
+ out:
+ spin_unlock(&blkif->vbd_lock);
+}
+
+
+void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
+{
+ blkif_t *blkif;
+ blkif_extent_le_t **px, *x;
+ vbd_t *vbd = NULL;
+ struct rb_node *rb;
+ blkif_vdev_t vdevice = shrink->vdevice;
+
+ blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("vbd_shrink attempted for non-existent blkif (%u,%u)\n",
+ shrink->domid, shrink->blkif_handle);
+ shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ spin_lock(&blkif->vbd_lock);
+
+ rb = blkif->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ break;
+ }
+
+ if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
+ {
+ shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ goto out;
+ }
+
+ if ( unlikely(vbd->extents == NULL) )
+ {
+ shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ goto out;
+ }
+
+ /* Find the last extent. We now know that there is at least one. */
+ for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
+ continue;
+
+ x = *px;
+ *px = x->next;
+ kfree(x);
+
+ shrink->status = BLKIF_BE_STATUS_OKAY;
+
+ out:
+ spin_unlock(&blkif->vbd_lock);
+}
+
+
+void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
+{
+ blkif_t *blkif;
+ vbd_t *vbd;
+ struct rb_node *rb;
+ blkif_extent_le_t *x, *t;
+ blkif_vdev_t vdevice = destroy->vdevice;
+
+ blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
+ destroy->domid, destroy->blkif_handle);
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ spin_lock(&blkif->vbd_lock);
+
+ rb = blkif->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ goto found;
+ }
+
+ destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ goto out;
+
+ found:
+ rb_erase(rb, &blkif->vbd_rb);
+ x = vbd->extents;
+ kfree(vbd);
+
+ while ( x != NULL )
+ {
+ t = x->next;
+ kfree(x);
+ x = t;
+ }
+
+ out:
+ spin_unlock(&blkif->vbd_lock);
+}
+
+
+void destroy_all_vbds(blkif_t *blkif)
+{
+ vbd_t *vbd;
+ struct rb_node *rb;
+ blkif_extent_le_t *x, *t;
+
+ spin_lock(&blkif->vbd_lock);
+
+ while ( (rb = blkif->vbd_rb.rb_node) != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+
+ rb_erase(rb, &blkif->vbd_rb);
+ x = vbd->extents;
+ kfree(vbd);
+
+ while ( x != NULL )
+ {
+ t = x->next;
+ kfree(x);
+ x = t;
+ }
+ }
+
+ spin_unlock(&blkif->vbd_lock);
+}
+
+
+static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd)
+{
+ blkif_extent_le_t *x;
+
+ vbd_info->device = vbd->vdevice;
+ vbd_info->info = vbd->type;
+ if ( vbd->readonly )
+ vbd_info->info |= VDISK_FLAG_RO;
+ vbd_info->capacity = 0ULL;
+ for ( x = vbd->extents; x != NULL; x = x->next )
+ vbd_info->capacity += x->extent.sector_length;
+
+ return 0;
+}
+
+
+int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
+{
+ int rc = 0, nr_vbds = 0;
+ struct rb_node *rb;
+
+ spin_lock(&blkif->vbd_lock);
+
+ if ( (rb = blkif->vbd_rb.rb_node) == NULL )
+ goto out;
+
+ new_subtree:
+ /* STEP 1. Find least node (it'll be left-most). */
+ while ( rb->rb_left != NULL )
+ rb = rb->rb_left;
+
+ for ( ; ; )
+ {
+ /* STEP 2. Dealt with left subtree. Now process current node. */
+ if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds],
+ rb_entry(rb, vbd_t, rb))) != 0 )
+ goto out;
+ if ( ++nr_vbds == max_vbds )
+ goto out;
+
+ /* STEP 3. Process right subtree, if any. */
+ if ( rb->rb_right != NULL )
+ {
+ rb = rb->rb_right;
+ goto new_subtree;
+ }
+
+ /* STEP 4. Done both subtrees. Head back through ancesstors. */
+ for ( ; ; )
+ {
+ /* We're done when we get back to the root node. */
+ if ( rb->rb_parent == NULL )
+ goto out;
+ /* If we are left of parent, then parent is next to process. */
+ if ( rb->rb_parent->rb_left == rb )
+ break;
+ /* If we are right of parent, then we climb to grandparent. */
+ rb = rb->rb_parent;
+ }
+
+ rb = rb->rb_parent;
+ }
+
+ out:
+ spin_unlock(&blkif->vbd_lock);
+ return (rc == 0) ? nr_vbds : rc;
+}
+
+
+int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
+{
+ blkif_extent_le_t *x;
+ vbd_t *vbd;
+ struct rb_node *rb;
+ blkif_sector_t sec_off;
+ unsigned long nr_secs;
+
+ spin_lock(&blkif->vbd_lock);
+
+ rb = blkif->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( pseg->ps_device < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( pseg->ps_device > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ goto found;
+ }
+
+ DPRINTK("vbd_translate; domain %u attempted to access "
+ "non-existent VBD.\n", blkif->domid);
+
+ spin_unlock(&blkif->vbd_lock);
+ return -ENODEV;
+
+ found:
+
+ if ( (operation == WRITE) && vbd->readonly )
+ {
+ spin_unlock(&blkif->vbd_lock);
+ return -EACCES;
+ }
+
+ /*
+ * Now iterate through the list of blkif_extents, working out which should
+ * be used to perform the translation.
+ */
+ sec_off = pseg->sector_number;
+ nr_secs = pseg->nr_sects;
+ for ( x = vbd->extents; x != NULL; x = x->next )
+ {
+ if ( sec_off < x->extent.sector_length )
+ {
+#if 0
+ pseg->ps_device = x->extent.device;
+#else
+ pseg->ps_bdev = x->bdev;
+#endif
+ pseg->sector_number = x->extent.sector_start + sec_off;
+ if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
+ goto overrun;
+ spin_unlock(&blkif->vbd_lock);
+ return 1;
+ }
+ sec_off -= x->extent.sector_length;
+ }
+
+ DPRINTK("vbd_translate: end of vbd.\n");
+ spin_unlock(&blkif->vbd_lock);
+ return -EACCES;
+
+ /*
+ * Here we deal with overrun onto the following extent. We don't deal with
+ * overrun of more than one boundary since each request is restricted to
+ * 2^9 512-byte sectors, so it should be trivial for control software to
+ * ensure that extents are large enough to prevent excessive overrun.
+ */
+ overrun:
+
+ /* Adjust length of first chunk to run to end of first extent. */
+ pseg[0].nr_sects = x->extent.sector_length - sec_off;
+
+ /* Set second chunk buffer and length to start where first chunk ended. */
+ pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9);
+ pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
+
+ /* Now move to the next extent. Check it exists and is long enough! */
+ if ( unlikely((x = x->next) == NULL) ||
+ unlikely(x->extent.sector_length < pseg[1].nr_sects) )
+ {
+ DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
+ spin_unlock(&blkif->vbd_lock);
+ return -EACCES;
+ }
+
+ /* Store the real device and start sector for the second chunk. */
+#if 0
+ pseg[1].ps_device = x->extent.device;
+#else
+ pseg->ps_bdev = x->bdev;
+#endif
+ pseg[1].sector_number = x->extent.sector_start;
+
+ spin_unlock(&blkif->vbd_lock);
+ return 2;
+}
+
+#define MAJOR_XEN(dev) ((dev)>>8)
+#define MINOR_XEN(dev) ((dev) & 0xff)
+
+#define XEN_IDE0_MAJOR IDE0_MAJOR
+#define XEN_IDE1_MAJOR IDE1_MAJOR
+#define XEN_IDE2_MAJOR IDE2_MAJOR
+#define XEN_IDE3_MAJOR IDE3_MAJOR
+#define XEN_IDE4_MAJOR IDE4_MAJOR
+#define XEN_IDE5_MAJOR IDE5_MAJOR
+#define XEN_IDE6_MAJOR IDE6_MAJOR
+#define XEN_IDE7_MAJOR IDE7_MAJOR
+#define XEN_IDE8_MAJOR IDE8_MAJOR
+#define XEN_IDE9_MAJOR IDE9_MAJOR
+#define XEN_SCSI_DISK0_MAJOR SCSI_DISK0_MAJOR
+#define XEN_SCSI_DISK1_MAJOR SCSI_DISK1_MAJOR
+#define XEN_SCSI_DISK2_MAJOR SCSI_DISK2_MAJOR
+#define XEN_SCSI_DISK3_MAJOR SCSI_DISK3_MAJOR
+#define XEN_SCSI_DISK4_MAJOR SCSI_DISK4_MAJOR
+#define XEN_SCSI_DISK5_MAJOR SCSI_DISK5_MAJOR
+#define XEN_SCSI_DISK6_MAJOR SCSI_DISK6_MAJOR
+#define XEN_SCSI_DISK7_MAJOR SCSI_DISK7_MAJOR
+#define XEN_SCSI_CDROM_MAJOR SCSI_CDROM_MAJOR
+
+static dev_t vbd_map_devnum(blkif_pdev_t cookie)
+{
+ int new_major;
+ int major = MAJOR_XEN(cookie);
+ int minor = MINOR_XEN(cookie);
+
+ switch (major) {
+ case XEN_IDE0_MAJOR: new_major = IDE0_MAJOR; break;
+ case XEN_IDE1_MAJOR: new_major = IDE1_MAJOR; break;
+ case XEN_IDE2_MAJOR: new_major = IDE2_MAJOR; break;
+ case XEN_IDE3_MAJOR: new_major = IDE3_MAJOR; break;
+ case XEN_IDE4_MAJOR: new_major = IDE4_MAJOR; break;
+ case XEN_IDE5_MAJOR: new_major = IDE5_MAJOR; break;
+ case XEN_IDE6_MAJOR: new_major = IDE6_MAJOR; break;
+ case XEN_IDE7_MAJOR: new_major = IDE7_MAJOR; break;
+ case XEN_IDE8_MAJOR: new_major = IDE8_MAJOR; break;
+ case XEN_IDE9_MAJOR: new_major = IDE9_MAJOR; break;
+ case XEN_SCSI_DISK0_MAJOR: new_major = SCSI_DISK0_MAJOR; break;
+ case XEN_SCSI_DISK1_MAJOR ... XEN_SCSI_DISK7_MAJOR:
+ new_major = SCSI_DISK1_MAJOR + major - XEN_SCSI_DISK1_MAJOR;
+ break;
+ case XEN_SCSI_CDROM_MAJOR: new_major = SCSI_CDROM_MAJOR; break;
+ default: new_major = 0; break;
+ }
+
+ return MKDEV(new_major, minor);
+}
--- /dev/null
+
+config XENBLOCK
+ tristate "Block device driver"
+ depends on ARCH_XEN
+ help
+ Block device driver for Xen
--- /dev/null
+
+obj-y := blkfront.o vbd.o
+
--- /dev/null
+/******************************************************************************
+ * block.c
+ *
+ * XenLinux virtual block-device driver.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ */
+
+#include "block.h"
+#include <linux/cdrom.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi.h>
+#include <asm-xen/ctrl_if.h>
+
+typedef unsigned char byte; /* from linux/ide.h */
+
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
+static unsigned int blkif_state = BLKIF_STATE_CLOSED;
+static unsigned int blkif_evtchn, blkif_irq;
+
+static int blkif_control_rsp_valid;
+static blkif_response_t blkif_control_rsp;
+
+static blkif_ring_t *blk_ring;
+static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
+static BLKIF_RING_IDX req_prod; /* Private request producer. */
+
+static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for
+ * recovery. Responses not stored here. */
+static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for
+ * recovery */
+static int recovery = 0; /* "Recovery in progress" flag. Protected
+ * by the blkif_io_lock */
+
+/* We plug the I/O ring if the driver is suspended or if the ring is full. */
+#define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
+ (blkif_state != BLKIF_STATE_CONNECTED))
+
+/*
+ * Request queues with outstanding work, but ring is currently full.
+ * We need no special lock here, as we always access this with the
+ * blkif_io_lock held. We only need a small maximum list.
+ */
+#define MAX_PENDING 8
+static request_queue_t *pending_queues[MAX_PENDING];
+static int nr_pending;
+
+static inline void flush_requests(void)
+{
+
+ blk_ring->req_prod = req_prod;
+
+ notify_via_evtchn(blkif_evtchn);
+}
+
+
+#if 0
+/*
+ * blkif_update_int/update-vbds_task - handle VBD update events.
+ * Schedule a task for keventd to run, which will update the VBDs and perform
+ * the corresponding updates to our view of VBD state.
+ */
+static struct tq_struct update_tq;
+static void update_vbds_task(void *unused)
+{
+ xlvbd_update_vbds();
+}
+#endif
+
+
+int blkif_open(struct inode *inode, struct file *filep)
+{
+ struct gendisk *gd = inode->i_bdev->bd_disk;
+ struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
+
+ /* Update of usage count is protected by per-device semaphore. */
+ di->mi->usage++;
+
+ return 0;
+}
+
+
+int blkif_release(struct inode *inode, struct file *filep)
+{
+ struct gendisk *gd = inode->i_bdev->bd_disk;
+ struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
+
+ /*
+ * When usage drops to zero it may allow more VBD updates to occur.
+ * Update of usage count is protected by a per-device semaphore.
+ */
+ if (--di->mi->usage == 0) {
+#if 0
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
+#endif
+ }
+
+ return 0;
+}
+
+
+int blkif_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ /* struct gendisk *gd = inode->i_bdev->bd_disk; */
+
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+ command, (long)argument, inode->i_rdev);
+
+ switch (command) {
+
+ case HDIO_GETGEO:
+ /* return ENOSYS to use defaults */
+ return -ENOSYS;
+
+ default:
+ printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+ command);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+#if 0
+/* check media change: should probably do something here in some cases :-) */
+int blkif_check(kdev_t dev)
+{
+ DPRINTK("blkif_check\n");
+ return 0;
+}
+
+int blkif_revalidate(kdev_t dev)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ xen_block_t *disk;
+ unsigned long capacity;
+ int i, rc = 0;
+
+ if ( (bd = bdget(dev)) == NULL )
+ return -EINVAL;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(dev)) == NULL) ||
+ ((disk = xldev_to_xldisk(dev)) == NULL) ||
+ ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if ( disk->usage > 1 )
+ {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ /* Only reread partition table if VBDs aren't mapped to partitions. */
+ if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
+ {
+ for ( i = gd->max_p - 1; i >= 0; i-- )
+ {
+ invalidate_device(dev+i, 1);
+ gd->part[MINOR(dev+i)].start_sect = 0;
+ gd->part[MINOR(dev+i)].nr_sects = 0;
+ gd->sizes[MINOR(dev+i)] = 0;
+ }
+
+ grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+#endif
+
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ */
+static int blkif_queue_request(struct request *req)
+{
+ struct xlbd_disk_info *di =
+ (struct xlbd_disk_info *)req->rq_disk->private_data;
+ unsigned long buffer_ma;
+ blkif_request_t *ring_req;
+ struct bio *bio;
+ struct bio_vec *bvec;
+ int idx, s;
+ unsigned int fsect, lsect;
+
+ if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
+ return 1;
+
+ /* Fill out a communications ring structure. */
+ ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
+ ring_req->id = (unsigned long)req;
+ ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
+ BLKIF_OP_READ;
+ ring_req->sector_number = (blkif_sector_t)req->sector;
+ ring_req->device = di->xd_device;
+
+ s = 0;
+ ring_req->nr_segments = 0;
+ rq_for_each_bio(bio, req) {
+ bio_for_each_segment(bvec, bio, idx) {
+ buffer_ma =
+ phys_to_machine(page_to_phys(bvec->bv_page));
+ if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
+ BUG();
+
+ fsect = bvec->bv_offset >> 9;
+ lsect = fsect + (bvec->bv_len >> 9) - 1;
+ if (unlikely(lsect > 7))
+ BUG();
+
+ ring_req->frame_and_sects[ring_req->nr_segments++] =
+ buffer_ma | (fsect << 3) | lsect;
+ s += bvec->bv_len >> 9;
+ }
+ }
+
+ req_prod++;
+
+ /* Keep a private copy so we can reissue requests when recovering. */
+ blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req =
+ *ring_req;
+ blk_ring_rec->req_prod++;
+
+ return 0;
+}
+
+/*
+ * do_blkif_request
+ * read a block; request is in a request queue
+ */
+void do_blkif_request(request_queue_t *rq)
+{
+ struct request *req;
+ int queued;
+
+ DPRINTK("Entered do_blkif_request\n");
+
+ queued = 0;
+
+ while ((req = elv_next_request(rq)) != NULL) {
+ if (!blk_fs_request(req)) {
+ end_request(req, 0);
+ continue;
+ }
+
+ if (BLKIF_RING_FULL) {
+ blk_stop_queue(rq);
+ break;
+ }
+ DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
+ req, req->cmd, req->sector, req->current_nr_sectors,
+ req->nr_sectors, req->buffer,
+ rq_data_dir(req) ? "write" : "read");
+ blkdev_dequeue_request(req);
+ if (blkif_queue_request(req)) {
+ blk_stop_queue(rq);
+ break;
+ }
+ queued++;
+ }
+
+ if (queued != 0)
+ flush_requests();
+}
+
+
+static void kick_pending_request_queues(void)
+{
+ /* We kick pending request queues if the ring is reasonably empty. */
+ if ( (nr_pending != 0) &&
+ ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
+ {
+ /* Attempt to drain the queue, but bail if the ring becomes full. */
+ while ( (nr_pending != 0) && !BLKIF_RING_FULL )
+ do_blkif_request(pending_queues[--nr_pending]);
+ }
+}
+
+
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ struct request *req;
+ blkif_response_t *bret;
+ BLKIF_RING_IDX i;
+ unsigned long flags;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ if (unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery)) {
+ printk("Bailed out\n");
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ for (i = resp_cons; i != blk_ring->resp_prod; i++) {
+ bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
+ switch (bret->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ if (unlikely(bret->status != BLKIF_RSP_OKAY))
+ DPRINTK("Bad return from blkdev data request: %lx\n",
+ bret->status);
+ req = (struct request *)bret->id;
+ /* XXXcl pass up status */
+ if (unlikely(end_that_request_first(req, 1,
+ req->hard_nr_sectors)))
+ BUG();
+
+ end_that_request_last(req);
+ break;
+ case BLKIF_OP_PROBE:
+ memcpy(&blkif_control_rsp, bret, sizeof(*bret));
+ blkif_control_rsp_valid = 1;
+ break;
+ default:
+ BUG();
+ }
+ }
+
+ resp_cons = i;
+ resp_cons_rec = i;
+
+ if (xlbd_blk_queue &&
+ test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags)) {
+ blk_start_queue(xlbd_blk_queue);
+ /* XXXcl call to request_fn should not be needed but
+ * we get stuck without... needs investigating
+ */
+ xlbd_blk_queue->request_fn(xlbd_blk_queue);
+ }
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+
+void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
+{
+ unsigned long flags;
+
+ retry:
+ while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+ if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
+ {
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+ goto retry;
+ }
+
+ memcpy(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req));
+ memcpy(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,
+ req, sizeof(*req));
+ req_prod++;
+ flush_requests();
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ while ( !blkif_control_rsp_valid )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
+ blkif_control_rsp_valid = 0;
+}
+
+
+static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_interface_connect_t up;
+
+ if ( status->handle != 0 )
+ {
+ printk(KERN_WARNING "Status change on unsupported blkif %d\n",
+ status->handle);
+ return;
+ }
+
+ switch ( status->status )
+ {
+ case BLKIF_INTERFACE_STATUS_DESTROYED:
+ printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
+ blkif_state);
+ break;
+
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+ if ( blkif_state != BLKIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
+ " in state %d\n", blkif_state);
+
+ printk(KERN_INFO "VBD driver recovery in progress\n");
+
+ /* Prevent new requests being issued until we fix things up. */
+ spin_lock_irq(&blkif_io_lock);
+ recovery = 1;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
+ spin_unlock_irq(&blkif_io_lock);
+
+ /* Free resources associated with old device channel. */
+ free_page((unsigned long)blk_ring);
+ free_irq(blkif_irq, NULL);
+ unbind_evtchn_from_irq(blkif_evtchn);
+ }
+
+ /* Move from CLOSED to DISCONNECTED state. */
+ blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
+ blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
+
+ /* Construct an interface-CONNECT message for the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT;
+ cmsg.length = sizeof(blkif_fe_interface_connect_t);
+ up.handle = 0;
+ up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
+ memcpy(cmsg.msg, &up, sizeof(up));
+
+ /* Tell the controller to bring up the interface. */
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
+ if ( blkif_state == BLKIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
+ " in state %d\n", blkif_state);
+ break;
+ }
+
+ blkif_evtchn = status->evtchn;
+ blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+ (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
+
+ if ( recovery )
+ {
+ int i;
+
+ /* Shouldn't need the blkif_io_lock here - the device is
+ * plugged and the recovery flag prevents the interrupt handler
+ * changing anything. */
+
+ /* Reissue requests from the private block ring. */
+ for ( i = 0;
+ resp_cons_rec < blk_ring_rec->req_prod;
+ resp_cons_rec++, i++ )
+ {
+ blk_ring->ring[i].req
+ = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req;
+ }
+
+ /* Reset the private block ring to match the new ring. */
+ memcpy(blk_ring_rec, blk_ring, sizeof(*blk_ring));
+ resp_cons_rec = 0;
+
+ /* blk_ring->req_prod will be set when we flush_requests().*/
+ blk_ring_rec->req_prod = req_prod = i;
+
+ wmb();
+
+ /* Switch off recovery mode, using a memory barrier to ensure that
+ * it's seen before we flush requests - we don't want to miss any
+ * interrupts. */
+ recovery = 0;
+ wmb();
+
+ /* Kicks things back into life. */
+ flush_requests();
+ }
+ else
+ {
+ /* Probe for discs that are attached to the interface. */
+ xlvbd_init();
+ }
+
+ blkif_state = BLKIF_STATE_CONNECTED;
+
+ /* Kick pending requests. */
+ spin_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues();
+ spin_unlock_irq(&blkif_io_lock);
+
+ break;
+
+ default:
+ printk(KERN_WARNING "Status change to unknown value %d\n",
+ status->status);
+ break;
+ }
+}
+
+
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
+ if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
+ goto parse_error;
+ blkif_status_change((blkif_fe_interface_status_changed_t *)
+ &msg->msg[0]);
+ break;
+#if 0
+ case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
+ break;
+#endif
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+
+int __init xlblk_init(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_changed_t st;
+
+ if ( (start_info.flags & SIF_INITDOMAIN)
+ || (start_info.flags & SIF_BLK_BE_DOMAIN) )
+ return 0;
+
+ printk(KERN_INFO "Initialising Xen virtual block device\n");
+
+ blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
+ memset(blk_ring_rec, 0, sizeof(*blk_ring_rec));
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(blkif_fe_driver_status_changed_t);
+ st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ while ( blkif_state != BLKIF_STATE_CONNECTED )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ return 0;
+#if 0
+ int error;
+
+ reset_xlblk_interface();
+
+ xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
+ xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD);
+
+ error = request_irq(xlblk_response_irq, xlblk_response_int,
+ SA_SAMPLE_RANDOM, "blkdev", NULL);
+ if (error) {
+ printk(KERN_ALERT "Could not allocate receive interrupt\n");
+ goto fail;
+ }
+
+ error = request_irq(xlblk_update_irq, xlblk_update_int,
+ 0, "blkdev", NULL);
+ if (error) {
+ printk(KERN_ALERT
+ "Could not allocate block update interrupt\n");
+ goto fail;
+ }
+
+ (void)xlvbd_init();
+
+ return 0;
+
+ fail:
+ return error;
+#endif
+}
+
+
+static void __exit xlblk_cleanup(void)
+{
+ /* XXX FIXME */
+ BUG();
+#if 0
+ /* xlvbd_cleanup(); */
+ free_irq(xlblk_response_irq, NULL);
+ free_irq(xlblk_update_irq, NULL);
+ unbind_virq_from_irq(VIRQ_BLKDEV);
+ unbind_virq_from_irq(VIRQ_VBD_UPD);
+#endif
+}
+
+
+module_init(xlblk_init);
+module_exit(xlblk_cleanup);
+
+
+void blkdev_suspend(void)
+{
+}
+
+
+void blkdev_resume(void)
+{
+}
--- /dev/null
+/******************************************************************************
+ * block.h
+ *
+ * Shared definitions between all levels of XenLinux Virtual block devices.
+ */
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/major.h>
+
+#include <linux/devfs_fs_kernel.h>
+
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include <asm-xen/blkif.h>
+
+#if 0
+#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 0
+#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
+#endif
+
+struct xlbd_type_info {
+ int partn_shift;
+ int devs_per_major;
+ int hardsect_size;
+ int max_sectors;
+ char *name;
+};
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'. They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct xlbd_major_info {
+ int major;
+ int usage;
+ int xd_device;
+ struct xlbd_type_info *type;
+};
+
+struct xlbd_disk_info {
+ int xd_device;
+ struct xlbd_major_info *mi;
+};
+
+typedef struct xen_block {
+ int usage;
+} xen_block_t;
+
+extern struct request_queue *xlbd_blk_queue;
+extern spinlock_t blkif_io_lock;
+
+extern int blkif_open(struct inode *inode, struct file *filep);
+extern int blkif_release(struct inode *inode, struct file *filep);
+extern int blkif_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument);
+extern int blkif_check(dev_t dev);
+extern int blkif_revalidate(dev_t dev);
+extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+extern void do_blkif_request (request_queue_t *rq);
+
+extern void xlvbd_update_vbds(void);
+
+/* Virtual block-device subsystem. */
+extern int xlvbd_init(void);
+extern void xlvbd_cleanup(void);
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
--- /dev/null
+/******************************************************************************
+ * vbd.c
+ *
+ * XenLinux virtual block-device driver (xvd).
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ */
+
+#include "block.h"
+#include <linux/blkdev.h>
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.
+ * potentially combinations of the two) in the naming scheme and in a few
+ * other places (like default readahead, etc).
+ */
+
+#define NUM_IDE_MAJORS 10
+#define NUM_SCSI_MAJORS 9
+#define NUM_VBD_MAJORS 1
+
+static struct xlbd_type_info xlbd_ide_type = {
+ .partn_shift = 6,
+ // XXXcl todo blksize_size[major] = 1024;
+ .hardsect_size = 512,
+ .max_sectors = 128, /* 'hwif->rqsize' if we knew it */
+ // XXXcl todo read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */
+ .name = "hd",
+};
+
+static struct xlbd_type_info xlbd_scsi_type = {
+ .partn_shift = 4,
+ // XXXcl todo blksize_size[major] = 1024; /* XXX 512; */
+ .hardsect_size = 512,
+ .max_sectors = 128*8, /* XXX 128; */
+ // XXXcl todo read_ahead[major] = 0; /* XXX 8; -- guessing */
+ .name = "sd",
+};
+
+static struct xlbd_type_info xlbd_vbd_type = {
+ .partn_shift = 4,
+ // XXXcl todo blksize_size[major] = 512;
+ .hardsect_size = 512,
+ .max_sectors = 128,
+ // XXXcl todo read_ahead[major] = 8;
+ .name = "xvd",
+};
+
+/* XXXcl handle cciss after finding out why it's "hacked" in */
+
+static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
+ NUM_VBD_MAJORS];
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+static int nr_vbds;
+static vdisk_t *vbd_info;
+
+struct request_queue *xlbd_blk_queue = NULL;
+
+#define MAJOR_XEN(dev) ((dev)>>8)
+#define MINOR_XEN(dev) ((dev) & 0xff)
+
+static struct block_device_operations xlvbd_block_fops =
+{
+ .owner = THIS_MODULE,
+ .open = blkif_open,
+ .release = blkif_release,
+ .ioctl = blkif_ioctl,
+#if 0
+ check_media_change: blkif_check,
+ revalidate: blkif_revalidate,
+#endif
+};
+
+spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+
+static int xlvbd_get_vbd_info(vdisk_t *disk_info)
+{
+ vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
+ blkif_request_t req;
+ blkif_response_t rsp;
+ int nr;
+
+ memset(&req, 0, sizeof(req));
+ req.operation = BLKIF_OP_PROBE;
+ req.nr_segments = 1;
+ req.frame_and_sects[0] = virt_to_machine(buf) | 7;
+
+ blkif_control_send(&req, &rsp);
+
+ if ( rsp.status <= 0 )
+ {
+ printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
+ return -1;
+ }
+
+ if ( (nr = rsp.status) > MAX_VBDS )
+ nr = MAX_VBDS;
+ memcpy(disk_info, buf, nr * sizeof(vdisk_t));
+
+ return nr;
+}
+
+static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor)
+{
+ int mi_idx, new_major;
+ int xd_major = MAJOR_XEN(xd_device);
+ int xd_minor = MINOR_XEN(xd_device);
+
+ *minor = xd_minor;
+
+ switch (xd_major) {
+ case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
+ case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
+ case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
+ case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
+ case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
+ case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
+ case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
+ case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
+ case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
+ case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
+ case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
+ case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+ mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
+ new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
+ break;
+ case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
+ default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
+ }
+
+ if (major_info[mi_idx])
+ return major_info[mi_idx];
+
+ major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+ if (major_info[mi_idx] == NULL)
+ return NULL;
+
+ memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
+
+ switch (mi_idx) {
+ case 0 ... (NUM_IDE_MAJORS - 1):
+ major_info[mi_idx]->type = &xlbd_ide_type;
+ break;
+ case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
+ major_info[mi_idx]->type = &xlbd_scsi_type;
+ break;
+ case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
+ (NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
+ major_info[mi_idx]->type = &xlbd_vbd_type;
+ break;
+ }
+ major_info[mi_idx]->major = new_major;
+
+ if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
+ printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
+ major_info[mi_idx]->major, major_info[mi_idx]->type->name);
+ goto out;
+ }
+
+ devfs_mk_dir(major_info[mi_idx]->type->name);
+
+ return major_info[mi_idx];
+
+ out:
+ kfree(major_info[mi_idx]);
+ major_info[mi_idx] = NULL;
+ return NULL;
+}
+
+static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi,
+ int xd_minor, vdisk_t *xd)
+{
+ struct gendisk *gd;
+ struct xlbd_disk_info *di;
+ int device, partno;
+
+ device = MKDEV(mi->major, xd_minor);
+ gd = get_gendisk(device, &partno);
+ if (gd)
+ return gd;
+
+ di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
+ if (di == NULL)
+ return NULL;
+ di->mi = mi;
+ di->xd_device = xd->device;
+
+ /* Construct an appropriate gendisk structure. */
+ gd = alloc_disk(1);
+ if (gd == NULL)
+ goto out;
+
+ gd->major = mi->major;
+ gd->first_minor = xd_minor;
+ gd->fops = &xlvbd_block_fops;
+ gd->private_data = di;
+ sprintf(gd->disk_name, "%s%c%d", mi->type->name,
+ 'a' + (xd_minor >> mi->type->partn_shift),
+ xd_minor & ((1 << mi->type->partn_shift) - 1));
+ /* sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */
+
+ set_capacity(gd, xd->capacity);
+
+ if (xlbd_blk_queue == NULL) {
+ xlbd_blk_queue = blk_init_queue(do_blkif_request,
+ &blkif_io_lock);
+ if (xlbd_blk_queue == NULL)
+ goto out;
+ elevator_init(xlbd_blk_queue, &elevator_noop);
+
+ /*
+ * Turn off barking 'headactive' mode. We dequeue
+ * buffer heads as soon as we pass them to back-end
+ * driver.
+ */
+ blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */
+
+ blk_queue_hardsect_size(xlbd_blk_queue,
+ mi->type->hardsect_size);
+ blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */
+
+ /* XXXcl: set mask to PAGE_SIZE for now, to improve either use
+ - blk_queue_merge_bvec to merge requests with adjacent ma's
+ - the tags infrastructure
+ - the dma infrastructure
+ */
+ blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
+
+ blk_queue_max_phys_segments(xlbd_blk_queue,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_hw_segments(xlbd_blk_queue,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */
+
+
+ }
+ gd->queue = xlbd_blk_queue;
+
+ add_disk(gd);
+
+ return gd;
+
+ out:
+ if (gd)
+ del_gendisk(gd);
+ kfree(di);
+ return NULL;
+}
+
+/*
+ * xlvbd_init_device - initialise a VBD device
+ * @disk: a vdisk_t describing the VBD
+ *
+ * Takes a vdisk_t * that describes a VBD the domain has access to.
+ * Performs appropriate initialisation and registration of the device.
+ *
+ * Care needs to be taken when making re-entrant calls to ensure that
+ * corruption does not occur. Also, devices that are in use should not have
+ * their details updated. This is the caller's responsibility.
+ */
+static int xlvbd_init_device(vdisk_t *xd)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ struct xlbd_major_info *mi;
+ int device;
+ int minor;
+
+ int err = -ENOMEM;
+
+ mi = xlbd_get_major_info(xd->device, &minor);
+ if (mi == NULL)
+ return -EPERM;
+
+ device = MKDEV(mi->major, minor);
+
+ if ((bd = bdget(device)) == NULL)
+ return -EPERM;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ gd = xlvbd_get_gendisk(mi, minor, xd);
+ if (mi == NULL) {
+ err = -EPERM;
+ goto out;
+ }
+
+ if (VDISK_READONLY(xd->info))
+ set_disk_ro(gd, 1);
+
+ /* Some final fix-ups depending on the device type */
+ switch (VDISK_TYPE(xd->info)) {
+ case VDISK_TYPE_CDROM:
+ gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD;
+ /* FALLTHROUGH */
+ case VDISK_TYPE_FLOPPY:
+ case VDISK_TYPE_TAPE:
+ gd->flags |= GENHD_FL_REMOVABLE;
+ break;
+
+ case VDISK_TYPE_DISK:
+ break;
+
+ default:
+ printk(KERN_ALERT "XenLinux: unknown device type %d\n",
+ VDISK_TYPE(xd->info));
+ break;
+ }
+
+ err = 0;
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return err;
+}
+
+#if 0
+/*
+ * xlvbd_remove_device - remove a device node if possible
+ * @device: numeric device ID
+ *
+ * Updates the gendisk structure and invalidates devices.
+ *
+ * This is OK for now but in future, should perhaps consider where this should
+ * deallocate gendisks / unregister devices.
+ */
+static int xlvbd_remove_device(int device)
+{
+ int i, rc = 0, minor = MINOR(device);
+ struct gendisk *gd;
+ struct block_device *bd;
+ xen_block_t *disk = NULL;
+
+ if ( (bd = bdget(device)) == NULL )
+ return -1;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(device)) == NULL) ||
+ ((disk = xldev_to_xldisk(device)) == NULL) )
+ BUG();
+
+ if ( disk->usage != 0 )
+ {
+ printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
+ rc = -1;
+ goto out;
+ }
+
+ if ( (minor & (gd->max_p-1)) != 0 )
+ {
+ /* 1: The VBD is mapped to a partition rather than a whole unit. */
+ invalidate_device(device, 1);
+ gd->part[minor].start_sect = 0;
+ gd->part[minor].nr_sects = 0;
+ gd->sizes[minor] = 0;
+
+ /* Clear the consists-of-virtual-partitions flag if possible. */
+ gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+ for ( i = 1; i < gd->max_p; i++ )
+ if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
+ gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+
+ /*
+ * If all virtual partitions are now gone, and a 'whole unit' VBD is
+ * present, then we can try to grok the unit's real partition table.
+ */
+ if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+ (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
+ !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
+ {
+ register_disk(gd,
+ device&~(gd->max_p-1),
+ gd->max_p,
+ &xlvbd_block_fops,
+ gd->part[minor&~(gd->max_p-1)].nr_sects);
+ }
+ }
+ else
+ {
+ /*
+ * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
+ * NB. The partition entries are only cleared if there are no VBDs
+ * mapped to individual partitions on this unit.
+ */
+ i = gd->max_p - 1; /* Default: clear subpartitions as well. */
+ if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+ i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
+ while ( i >= 0 )
+ {
+ invalidate_device(device+i, 1);
+ gd->part[minor+i].start_sect = 0;
+ gd->part[minor+i].nr_sects = 0;
+ gd->sizes[minor+i] = 0;
+ i--;
+ }
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+ int i, j, k, old_nr, new_nr;
+ vdisk_t *old_info, *new_info, *merged_info;
+
+ old_info = vbd_info;
+ old_nr = nr_vbds;
+
+ new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+ if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
+ {
+ kfree(new_info);
+ return;
+ }
+
+ /*
+ * Final list maximum size is old list + new list. This occurs only when
+ * old list and new list do not overlap at all, and we cannot yet destroy
+ * VBDs in the old list because the usage counts are busy.
+ */
+ merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
+
+ /* @i tracks old list; @j tracks new list; @k tracks merged list. */
+ i = j = k = 0;
+
+ while ( (i < old_nr) && (j < new_nr) )
+ {
+ if ( old_info[i].device < new_info[j].device )
+ {
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+ i++;
+ }
+ else if ( old_info[i].device > new_info[j].device )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+ j++;
+ }
+ else
+ {
+ if ( ((old_info[i].capacity == new_info[j].capacity) &&
+ (old_info[i].info == new_info[j].info)) ||
+ (xlvbd_remove_device(old_info[i].device) != 0) )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+ else if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+ i++; j++;
+ }
+ }
+
+ for ( ; i < old_nr; i++ )
+ {
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+ }
+
+ for ( ; j < new_nr; j++ )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+ }
+
+ vbd_info = merged_info;
+ nr_vbds = k;
+
+ kfree(old_info);
+ kfree(new_info);
+}
+#endif
+
+/*
+ * Set up all the linux device goop for the virtual block devices
+ * (vbd's) that we know about. Note that although from the backend
+ * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
+ * number, the domain creation tools conventionally allocate these
+ * numbers to correspond to those used by 'real' linux -- this is just
+ * for convenience as it means e.g. that the same /etc/fstab can be
+ * used when booting with or without Xen.
+ */
+int xlvbd_init(void)
+{
+ int i;
+
+ /*
+ * If compiled as a module, we don't support unloading yet. We
+ * therefore permanently increment the reference count to
+ * disallow it.
+ */
+ MOD_INC_USE_COUNT;
+
+ memset(major_info, 0, sizeof(major_info));
+
+ for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
+ }
+
+ vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+ nr_vbds = xlvbd_get_vbd_info(vbd_info);
+
+ if (nr_vbds < 0) {
+ kfree(vbd_info);
+ vbd_info = NULL;
+ nr_vbds = 0;
+ } else {
+ for (i = 0; i < nr_vbds; i++)
+ xlvbd_init_device(&vbd_info[i]);
+ }
+
+ return 0;
+}
+++ /dev/null
-
-config XENBLOCK
- tristate "Block device driver"
- depends on ARCH_XEN
- help
- Block device driver for Xen
+++ /dev/null
-
-obj-y := vbd.o block.o
-
+++ /dev/null
-/******************************************************************************
- * block.c
- *
- * XenLinux virtual block-device driver.
- *
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- * Copyright (c) 2004, Christian Limpach
- */
-
-#include "block.h"
-#include <linux/cdrom.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <scsi/scsi.h>
-#include <asm-xen/ctrl_if.h>
-
-typedef unsigned char byte; /* from linux/ide.h */
-
-#define BLKIF_STATE_CLOSED 0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED 2
-static unsigned int blkif_state = BLKIF_STATE_CLOSED;
-static unsigned int blkif_evtchn, blkif_irq;
-
-static int blkif_control_rsp_valid;
-static blkif_response_t blkif_control_rsp;
-
-static blkif_ring_t *blk_ring;
-static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
-static BLKIF_RING_IDX req_prod; /* Private request producer. */
-
-static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for
- * recovery. Responses not stored here. */
-static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for
- * recovery */
-static int recovery = 0; /* "Recovery in progress" flag. Protected
- * by the blkif_io_lock */
-
-/* We plug the I/O ring if the driver is suspended or if the ring is full. */
-#define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
- (blkif_state != BLKIF_STATE_CONNECTED))
-
-/*
- * Request queues with outstanding work, but ring is currently full.
- * We need no special lock here, as we always access this with the
- * blkif_io_lock held. We only need a small maximum list.
- */
-#define MAX_PENDING 8
-static request_queue_t *pending_queues[MAX_PENDING];
-static int nr_pending;
-
-static inline void flush_requests(void)
-{
-
- blk_ring->req_prod = req_prod;
-
- notify_via_evtchn(blkif_evtchn);
-}
-
-
-#if 0
-/*
- * blkif_update_int/update-vbds_task - handle VBD update events.
- * Schedule a task for keventd to run, which will update the VBDs and perform
- * the corresponding updates to our view of VBD state.
- */
-static struct tq_struct update_tq;
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
-}
-#endif
-
-
-int blkif_open(struct inode *inode, struct file *filep)
-{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
- /* Update of usage count is protected by per-device semaphore. */
- di->mi->usage++;
-
- return 0;
-}
-
-
-int blkif_release(struct inode *inode, struct file *filep)
-{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
- /*
- * When usage drops to zero it may allow more VBD updates to occur.
- * Update of usage count is protected by a per-device semaphore.
- */
- if (--di->mi->usage == 0) {
-#if 0
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
-#endif
- }
-
- return 0;
-}
-
-
-int blkif_ioctl(struct inode *inode, struct file *filep,
- unsigned command, unsigned long argument)
-{
- /* struct gendisk *gd = inode->i_bdev->bd_disk; */
-
- DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long)argument, inode->i_rdev);
-
- switch (command) {
-
- case HDIO_GETGEO:
- /* return ENOSYS to use defaults */
- return -ENOSYS;
-
- default:
- printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
- command);
- return -ENOSYS;
- }
-
- return 0;
-}
-
-#if 0
-/* check media change: should probably do something here in some cases :-) */
-int blkif_check(kdev_t dev)
-{
- DPRINTK("blkif_check\n");
- return 0;
-}
-
-int blkif_revalidate(kdev_t dev)
-{
- struct block_device *bd;
- struct gendisk *gd;
- xen_block_t *disk;
- unsigned long capacity;
- int i, rc = 0;
-
- if ( (bd = bdget(dev)) == NULL )
- return -EINVAL;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- if ( ((gd = get_gendisk(dev)) == NULL) ||
- ((disk = xldev_to_xldisk(dev)) == NULL) ||
- ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
- {
- rc = -EINVAL;
- goto out;
- }
-
- if ( disk->usage > 1 )
- {
- rc = -EBUSY;
- goto out;
- }
-
- /* Only reread partition table if VBDs aren't mapped to partitions. */
- if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
- {
- for ( i = gd->max_p - 1; i >= 0; i-- )
- {
- invalidate_device(dev+i, 1);
- gd->part[MINOR(dev+i)].start_sect = 0;
- gd->part[MINOR(dev+i)].nr_sects = 0;
- gd->sizes[MINOR(dev+i)] = 0;
- }
-
- grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
- }
-
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return rc;
-}
-#endif
-
-
-/*
- * blkif_queue_request
- *
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- * virtual address in the guest os.
- */
-static int blkif_queue_request(struct request *req)
-{
- struct xlbd_disk_info *di =
- (struct xlbd_disk_info *)req->rq_disk->private_data;
- unsigned long buffer_ma;
- blkif_request_t *ring_req;
- struct bio *bio;
- struct bio_vec *bvec;
- int idx, s;
- unsigned int fsect, lsect;
-
- if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
- return 1;
-
- /* Fill out a communications ring structure. */
- ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
- ring_req->id = (unsigned long)req;
- ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
- BLKIF_OP_READ;
- ring_req->sector_number = (blkif_sector_t)req->sector;
- ring_req->device = di->xd_device;
-
- s = 0;
- ring_req->nr_segments = 0;
- rq_for_each_bio(bio, req) {
- bio_for_each_segment(bvec, bio, idx) {
- buffer_ma =
- phys_to_machine(page_to_phys(bvec->bv_page));
- if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
- BUG();
-
- fsect = bvec->bv_offset >> 9;
- lsect = fsect + (bvec->bv_len >> 9) - 1;
- if (unlikely(lsect > 7))
- BUG();
-
- ring_req->frame_and_sects[ring_req->nr_segments++] =
- buffer_ma | (fsect << 3) | lsect;
- s += bvec->bv_len >> 9;
- }
- }
-
- req_prod++;
-
- /* Keep a private copy so we can reissue requests when recovering. */
- blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req =
- *ring_req;
- blk_ring_rec->req_prod++;
-
- return 0;
-}
-
-/*
- * do_blkif_request
- * read a block; request is in a request queue
- */
-void do_blkif_request(request_queue_t *rq)
-{
- struct request *req;
- int queued;
-
- DPRINTK("Entered do_blkif_request\n");
-
- queued = 0;
-
- while ((req = elv_next_request(rq)) != NULL) {
- if (!blk_fs_request(req)) {
- end_request(req, 0);
- continue;
- }
-
- if (BLKIF_RING_FULL) {
- blk_stop_queue(rq);
- break;
- }
- DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
- req, req->cmd, req->sector, req->current_nr_sectors,
- req->nr_sectors, req->buffer,
- rq_data_dir(req) ? "write" : "read");
- blkdev_dequeue_request(req);
- if (blkif_queue_request(req)) {
- blk_stop_queue(rq);
- break;
- }
- queued++;
- }
-
- if (queued != 0)
- flush_requests();
-}
-
-
-static void kick_pending_request_queues(void)
-{
- /* We kick pending request queues if the ring is reasonably empty. */
- if ( (nr_pending != 0) &&
- ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
- {
- /* Attempt to drain the queue, but bail if the ring becomes full. */
- while ( (nr_pending != 0) && !BLKIF_RING_FULL )
- do_blkif_request(pending_queues[--nr_pending]);
- }
-}
-
-
-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- struct request *req;
- blkif_response_t *bret;
- BLKIF_RING_IDX i;
- unsigned long flags;
-
- spin_lock_irqsave(&blkif_io_lock, flags);
-
- if (unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery)) {
- printk("Bailed out\n");
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
- return IRQ_HANDLED;
- }
-
- for (i = resp_cons; i != blk_ring->resp_prod; i++) {
- bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
- switch (bret->operation) {
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- if (unlikely(bret->status != BLKIF_RSP_OKAY))
- DPRINTK("Bad return from blkdev data request: %lx\n",
- bret->status);
- req = (struct request *)bret->id;
- /* XXXcl pass up status */
- if (unlikely(end_that_request_first(req, 1,
- req->hard_nr_sectors)))
- BUG();
-
- end_that_request_last(req);
- break;
- case BLKIF_OP_PROBE:
- memcpy(&blkif_control_rsp, bret, sizeof(*bret));
- blkif_control_rsp_valid = 1;
- break;
- default:
- BUG();
- }
- }
-
- resp_cons = i;
- resp_cons_rec = i;
-
- if (xlbd_blk_queue &&
- test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags)) {
- blk_start_queue(xlbd_blk_queue);
- /* XXXcl call to request_fn should not be needed but
- * we get stuck without... needs investigating
- */
- xlbd_blk_queue->request_fn(xlbd_blk_queue);
- }
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- return IRQ_HANDLED;
-}
-
-
-void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
- unsigned long flags;
-
- retry:
- while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- spin_lock_irqsave(&blkif_io_lock, flags);
- if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
- {
- spin_unlock_irqrestore(&blkif_io_lock, flags);
- goto retry;
- }
-
- memcpy(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req));
- memcpy(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,
- req, sizeof(*req));
- req_prod++;
- flush_requests();
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- while ( !blkif_control_rsp_valid )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
- blkif_control_rsp_valid = 0;
-}
-
-
-static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
-{
- ctrl_msg_t cmsg;
- blkif_fe_interface_connect_t up;
-
- if ( status->handle != 0 )
- {
- printk(KERN_WARNING "Status change on unsupported blkif %d\n",
- status->handle);
- return;
- }
-
- switch ( status->status )
- {
- case BLKIF_INTERFACE_STATUS_DESTROYED:
- printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
- blkif_state);
- break;
-
- case BLKIF_INTERFACE_STATUS_DISCONNECTED:
- if ( blkif_state != BLKIF_STATE_CLOSED )
- {
- printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
- " in state %d\n", blkif_state);
-
- printk(KERN_INFO "VBD driver recovery in progress\n");
-
- /* Prevent new requests being issued until we fix things up. */
- spin_lock_irq(&blkif_io_lock);
- recovery = 1;
- blkif_state = BLKIF_STATE_DISCONNECTED;
- spin_unlock_irq(&blkif_io_lock);
-
- /* Free resources associated with old device channel. */
- free_page((unsigned long)blk_ring);
- free_irq(blkif_irq, NULL);
- unbind_evtchn_from_irq(blkif_evtchn);
- }
-
- /* Move from CLOSED to DISCONNECTED state. */
- blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
- blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
- blkif_state = BLKIF_STATE_DISCONNECTED;
-
- /* Construct an interface-CONNECT message for the domain controller. */
- cmsg.type = CMSG_BLKIF_FE;
- cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT;
- cmsg.length = sizeof(blkif_fe_interface_connect_t);
- up.handle = 0;
- up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
- memcpy(cmsg.msg, &up, sizeof(up));
-
- /* Tell the controller to bring up the interface. */
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
- break;
-
- case BLKIF_INTERFACE_STATUS_CONNECTED:
- if ( blkif_state == BLKIF_STATE_CLOSED )
- {
- printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
- " in state %d\n", blkif_state);
- break;
- }
-
- blkif_evtchn = status->evtchn;
- blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
- (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
-
- if ( recovery )
- {
- int i;
-
- /* Shouldn't need the blkif_io_lock here - the device is
- * plugged and the recovery flag prevents the interrupt handler
- * changing anything. */
-
- /* Reissue requests from the private block ring. */
- for ( i = 0;
- resp_cons_rec < blk_ring_rec->req_prod;
- resp_cons_rec++, i++ )
- {
- blk_ring->ring[i].req
- = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req;
- }
-
- /* Reset the private block ring to match the new ring. */
- memcpy(blk_ring_rec, blk_ring, sizeof(*blk_ring));
- resp_cons_rec = 0;
-
- /* blk_ring->req_prod will be set when we flush_requests().*/
- blk_ring_rec->req_prod = req_prod = i;
-
- wmb();
-
- /* Switch off recovery mode, using a memory barrier to ensure that
- * it's seen before we flush requests - we don't want to miss any
- * interrupts. */
- recovery = 0;
- wmb();
-
- /* Kicks things back into life. */
- flush_requests();
- }
- else
- {
- /* Probe for discs that are attached to the interface. */
- xlvbd_init();
- }
-
- blkif_state = BLKIF_STATE_CONNECTED;
-
- /* Kick pending requests. */
- spin_lock_irq(&blkif_io_lock);
- kick_pending_request_queues();
- spin_unlock_irq(&blkif_io_lock);
-
- break;
-
- default:
- printk(KERN_WARNING "Status change to unknown value %d\n",
- status->status);
- break;
- }
-}
-
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
- if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
- goto parse_error;
- blkif_status_change((blkif_fe_interface_status_changed_t *)
- &msg->msg[0]);
- break;
-#if 0
- case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
- break;
-#endif
- default:
- goto parse_error;
- }
-
- ctrl_if_send_response(msg);
- return;
-
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-
-int __init xlblk_init(void)
-{
- ctrl_msg_t cmsg;
- blkif_fe_driver_status_changed_t st;
-
- if ( (start_info.flags & SIF_INITDOMAIN)
- || (start_info.flags & SIF_BLK_BE_DOMAIN) )
- return 0;
-
- printk(KERN_INFO "Initialising Xen virtual block device\n");
-
- blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
- memset(blk_ring_rec, 0, sizeof(*blk_ring_rec));
-
- (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_BLKIF_FE;
- cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
- cmsg.length = sizeof(blkif_fe_driver_status_changed_t);
- st.status = BLKIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
- /*
- * We should read 'nr_interfaces' from response message and wait
- * for notifications before proceeding. For now we assume that we
- * will be notified of exactly one interface.
- */
- while ( blkif_state != BLKIF_STATE_CONNECTED )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- return 0;
-#if 0
- int error;
-
- reset_xlblk_interface();
-
- xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
- xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD);
-
- error = request_irq(xlblk_response_irq, xlblk_response_int,
- SA_SAMPLE_RANDOM, "blkdev", NULL);
- if (error) {
- printk(KERN_ALERT "Could not allocate receive interrupt\n");
- goto fail;
- }
-
- error = request_irq(xlblk_update_irq, xlblk_update_int,
- 0, "blkdev", NULL);
- if (error) {
- printk(KERN_ALERT
- "Could not allocate block update interrupt\n");
- goto fail;
- }
-
- (void)xlvbd_init();
-
- return 0;
-
- fail:
- return error;
-#endif
-}
-
-
-static void __exit xlblk_cleanup(void)
-{
- /* XXX FIXME */
- BUG();
-#if 0
- /* xlvbd_cleanup(); */
- free_irq(xlblk_response_irq, NULL);
- free_irq(xlblk_update_irq, NULL);
- unbind_virq_from_irq(VIRQ_BLKDEV);
- unbind_virq_from_irq(VIRQ_VBD_UPD);
-#endif
-}
-
-
-module_init(xlblk_init);
-module_exit(xlblk_cleanup);
-
-
-void blkdev_suspend(void)
-{
-}
-
-
-void blkdev_resume(void)
-{
-}
+++ /dev/null
-/******************************************************************************
- * block.h
- *
- * Shared definitions between all levels of XenLinux Virtual block devices.
- */
-
-#ifndef __XEN_DRIVERS_BLOCK_H__
-#define __XEN_DRIVERS_BLOCK_H__
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-
-#include <linux/fs.h>
-#include <linux/hdreg.h>
-#include <linux/blkdev.h>
-#include <linux/major.h>
-
-#include <linux/devfs_fs_kernel.h>
-
-#include <asm/hypervisor-ifs/hypervisor-if.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
-
-#include <asm-xen/blkif.h>
-
-#if 0
-#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#if 0
-#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
-#else
-#define DPRINTK_IOCTL(_f, _a...) ((void)0)
-#endif
-
-struct xlbd_type_info {
- int partn_shift;
- int devs_per_major;
- int hardsect_size;
- int max_sectors;
- char *name;
-};
-
-/*
- * We have one of these per vbd, whether ide, scsi or 'other'. They
- * hang in private_data off the gendisk structure. We may end up
- * putting all kinds of interesting stuff here :-)
- */
-struct xlbd_major_info {
- int major;
- int usage;
- int xd_device;
- struct xlbd_type_info *type;
-};
-
-struct xlbd_disk_info {
- int xd_device;
- struct xlbd_major_info *mi;
-};
-
-typedef struct xen_block {
- int usage;
-} xen_block_t;
-
-extern struct request_queue *xlbd_blk_queue;
-extern spinlock_t blkif_io_lock;
-
-extern int blkif_open(struct inode *inode, struct file *filep);
-extern int blkif_release(struct inode *inode, struct file *filep);
-extern int blkif_ioctl(struct inode *inode, struct file *filep,
- unsigned command, unsigned long argument);
-extern int blkif_check(dev_t dev);
-extern int blkif_revalidate(dev_t dev);
-extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
-extern void do_blkif_request (request_queue_t *rq);
-
-extern void xlvbd_update_vbds(void);
-
-/* Virtual block-device subsystem. */
-extern int xlvbd_init(void);
-extern void xlvbd_cleanup(void);
-
-#endif /* __XEN_DRIVERS_BLOCK_H__ */
+++ /dev/null
-/******************************************************************************
- * vbd.c
- *
- * XenLinux virtual block-device driver (xvd).
- *
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- * Copyright (c) 2004, Christian Limpach
- */
-
-#include "block.h"
-#include <linux/blkdev.h>
-
-/*
- * For convenience we distinguish between ide, scsi and 'other' (i.e.
- * potentially combinations of the two) in the naming scheme and in a few
- * other places (like default readahead, etc).
- */
-
-#define NUM_IDE_MAJORS 10
-#define NUM_SCSI_MAJORS 9
-#define NUM_VBD_MAJORS 1
-
-static struct xlbd_type_info xlbd_ide_type = {
- .partn_shift = 6,
- // XXXcl todo blksize_size[major] = 1024;
- .hardsect_size = 512,
- .max_sectors = 128, /* 'hwif->rqsize' if we knew it */
- // XXXcl todo read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */
- .name = "hd",
-};
-
-static struct xlbd_type_info xlbd_scsi_type = {
- .partn_shift = 4,
- // XXXcl todo blksize_size[major] = 1024; /* XXX 512; */
- .hardsect_size = 512,
- .max_sectors = 128*8, /* XXX 128; */
- // XXXcl todo read_ahead[major] = 0; /* XXX 8; -- guessing */
- .name = "sd",
-};
-
-static struct xlbd_type_info xlbd_vbd_type = {
- .partn_shift = 4,
- // XXXcl todo blksize_size[major] = 512;
- .hardsect_size = 512,
- .max_sectors = 128,
- // XXXcl todo read_ahead[major] = 8;
- .name = "xvd",
-};
-
-/* XXXcl handle cciss after finding out why it's "hacked" in */
-
-static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
- NUM_VBD_MAJORS];
-
-/* Information about our VBDs. */
-#define MAX_VBDS 64
-static int nr_vbds;
-static vdisk_t *vbd_info;
-
-struct request_queue *xlbd_blk_queue = NULL;
-
-#define MAJOR_XEN(dev) ((dev)>>8)
-#define MINOR_XEN(dev) ((dev) & 0xff)
-
-static struct block_device_operations xlvbd_block_fops =
-{
- .owner = THIS_MODULE,
- .open = blkif_open,
- .release = blkif_release,
- .ioctl = blkif_ioctl,
-#if 0
- check_media_change: blkif_check,
- revalidate: blkif_revalidate,
-#endif
-};
-
-spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
-
-static int xlvbd_get_vbd_info(vdisk_t *disk_info)
-{
- vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
- blkif_request_t req;
- blkif_response_t rsp;
- int nr;
-
- memset(&req, 0, sizeof(req));
- req.operation = BLKIF_OP_PROBE;
- req.nr_segments = 1;
- req.frame_and_sects[0] = virt_to_machine(buf) | 7;
-
- blkif_control_send(&req, &rsp);
-
- if ( rsp.status <= 0 )
- {
- printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
- return -1;
- }
-
- if ( (nr = rsp.status) > MAX_VBDS )
- nr = MAX_VBDS;
- memcpy(disk_info, buf, nr * sizeof(vdisk_t));
-
- return nr;
-}
-
-static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor)
-{
- int mi_idx, new_major;
- int xd_major = MAJOR_XEN(xd_device);
- int xd_minor = MINOR_XEN(xd_device);
-
- *minor = xd_minor;
-
- switch (xd_major) {
- case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
- case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
- case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
- case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
- case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
- case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
- case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
- case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
- case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
- case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
- case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
- case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
- mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
- new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
- break;
- case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
- default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
- }
-
- if (major_info[mi_idx])
- return major_info[mi_idx];
-
- major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
- if (major_info[mi_idx] == NULL)
- return NULL;
-
- memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
-
- switch (mi_idx) {
- case 0 ... (NUM_IDE_MAJORS - 1):
- major_info[mi_idx]->type = &xlbd_ide_type;
- break;
- case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
- major_info[mi_idx]->type = &xlbd_scsi_type;
- break;
- case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
- (NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
- major_info[mi_idx]->type = &xlbd_vbd_type;
- break;
- }
- major_info[mi_idx]->major = new_major;
-
- if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
- printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
- major_info[mi_idx]->major, major_info[mi_idx]->type->name);
- goto out;
- }
-
- devfs_mk_dir(major_info[mi_idx]->type->name);
-
- return major_info[mi_idx];
-
- out:
- kfree(major_info[mi_idx]);
- major_info[mi_idx] = NULL;
- return NULL;
-}
-
-static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi,
- int xd_minor, vdisk_t *xd)
-{
- struct gendisk *gd;
- struct xlbd_disk_info *di;
- int device, partno;
-
- device = MKDEV(mi->major, xd_minor);
- gd = get_gendisk(device, &partno);
- if (gd)
- return gd;
-
- di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
- if (di == NULL)
- return NULL;
- di->mi = mi;
- di->xd_device = xd->device;
-
- /* Construct an appropriate gendisk structure. */
- gd = alloc_disk(1);
- if (gd == NULL)
- goto out;
-
- gd->major = mi->major;
- gd->first_minor = xd_minor;
- gd->fops = &xlvbd_block_fops;
- gd->private_data = di;
- sprintf(gd->disk_name, "%s%c%d", mi->type->name,
- 'a' + (xd_minor >> mi->type->partn_shift),
- xd_minor & ((1 << mi->type->partn_shift) - 1));
- /* sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */
-
- set_capacity(gd, xd->capacity);
-
- if (xlbd_blk_queue == NULL) {
- xlbd_blk_queue = blk_init_queue(do_blkif_request,
- &blkif_io_lock);
- if (xlbd_blk_queue == NULL)
- goto out;
- elevator_init(xlbd_blk_queue, &elevator_noop);
-
- /*
- * Turn off barking 'headactive' mode. We dequeue
- * buffer heads as soon as we pass them to back-end
- * driver.
- */
- blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */
-
- blk_queue_hardsect_size(xlbd_blk_queue,
- mi->type->hardsect_size);
- blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */
-
- /* XXXcl: set mask to PAGE_SIZE for now, to improve either use
- - blk_queue_merge_bvec to merge requests with adjacent ma's
- - the tags infrastructure
- - the dma infrastructure
- */
- blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
-
- blk_queue_max_phys_segments(xlbd_blk_queue,
- BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(xlbd_blk_queue,
- BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */
-
-
- }
- gd->queue = xlbd_blk_queue;
-
- add_disk(gd);
-
- return gd;
-
- out:
- if (gd)
- del_gendisk(gd);
- kfree(di);
- return NULL;
-}
-
-/*
- * xlvbd_init_device - initialise a VBD device
- * @disk: a vdisk_t describing the VBD
- *
- * Takes a vdisk_t * that describes a VBD the domain has access to.
- * Performs appropriate initialisation and registration of the device.
- *
- * Care needs to be taken when making re-entrant calls to ensure that
- * corruption does not occur. Also, devices that are in use should not have
- * their details updated. This is the caller's responsibility.
- */
-static int xlvbd_init_device(vdisk_t *xd)
-{
- struct block_device *bd;
- struct gendisk *gd;
- struct xlbd_major_info *mi;
- int device;
- int minor;
-
- int err = -ENOMEM;
-
- mi = xlbd_get_major_info(xd->device, &minor);
- if (mi == NULL)
- return -EPERM;
-
- device = MKDEV(mi->major, minor);
-
- if ((bd = bdget(device)) == NULL)
- return -EPERM;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- gd = xlvbd_get_gendisk(mi, minor, xd);
- if (mi == NULL) {
- err = -EPERM;
- goto out;
- }
-
- if (VDISK_READONLY(xd->info))
- set_disk_ro(gd, 1);
-
- /* Some final fix-ups depending on the device type */
- switch (VDISK_TYPE(xd->info)) {
- case VDISK_TYPE_CDROM:
- gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD;
- /* FALLTHROUGH */
- case VDISK_TYPE_FLOPPY:
- case VDISK_TYPE_TAPE:
- gd->flags |= GENHD_FL_REMOVABLE;
- break;
-
- case VDISK_TYPE_DISK:
- break;
-
- default:
- printk(KERN_ALERT "XenLinux: unknown device type %d\n",
- VDISK_TYPE(xd->info));
- break;
- }
-
- err = 0;
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return err;
-}
-
-#if 0
-/*
- * xlvbd_remove_device - remove a device node if possible
- * @device: numeric device ID
- *
- * Updates the gendisk structure and invalidates devices.
- *
- * This is OK for now but in future, should perhaps consider where this should
- * deallocate gendisks / unregister devices.
- */
-static int xlvbd_remove_device(int device)
-{
- int i, rc = 0, minor = MINOR(device);
- struct gendisk *gd;
- struct block_device *bd;
- xen_block_t *disk = NULL;
-
- if ( (bd = bdget(device)) == NULL )
- return -1;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- if ( ((gd = get_gendisk(device)) == NULL) ||
- ((disk = xldev_to_xldisk(device)) == NULL) )
- BUG();
-
- if ( disk->usage != 0 )
- {
- printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
- rc = -1;
- goto out;
- }
-
- if ( (minor & (gd->max_p-1)) != 0 )
- {
- /* 1: The VBD is mapped to a partition rather than a whole unit. */
- invalidate_device(device, 1);
- gd->part[minor].start_sect = 0;
- gd->part[minor].nr_sects = 0;
- gd->sizes[minor] = 0;
-
- /* Clear the consists-of-virtual-partitions flag if possible. */
- gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
- for ( i = 1; i < gd->max_p; i++ )
- if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
- gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
-
- /*
- * If all virtual partitions are now gone, and a 'whole unit' VBD is
- * present, then we can try to grok the unit's real partition table.
- */
- if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
- (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
- !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
- {
- register_disk(gd,
- device&~(gd->max_p-1),
- gd->max_p,
- &xlvbd_block_fops,
- gd->part[minor&~(gd->max_p-1)].nr_sects);
- }
- }
- else
- {
- /*
- * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
- * NB. The partition entries are only cleared if there are no VBDs
- * mapped to individual partitions on this unit.
- */
- i = gd->max_p - 1; /* Default: clear subpartitions as well. */
- if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
- i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
- while ( i >= 0 )
- {
- invalidate_device(device+i, 1);
- gd->part[minor+i].start_sect = 0;
- gd->part[minor+i].nr_sects = 0;
- gd->sizes[minor+i] = 0;
- i--;
- }
- }
-
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return rc;
-}
-
-/*
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
- * state. The VBDs need to be updated in this way when the domain is
- * initialised and also each time we receive an XLBLK_UPDATE event.
- */
-void xlvbd_update_vbds(void)
-{
- int i, j, k, old_nr, new_nr;
- vdisk_t *old_info, *new_info, *merged_info;
-
- old_info = vbd_info;
- old_nr = nr_vbds;
-
- new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
- if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
- {
- kfree(new_info);
- return;
- }
-
- /*
- * Final list maximum size is old list + new list. This occurs only when
- * old list and new list do not overlap at all, and we cannot yet destroy
- * VBDs in the old list because the usage counts are busy.
- */
- merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
-
- /* @i tracks old list; @j tracks new list; @k tracks merged list. */
- i = j = k = 0;
-
- while ( (i < old_nr) && (j < new_nr) )
- {
- if ( old_info[i].device < new_info[j].device )
- {
- if ( xlvbd_remove_device(old_info[i].device) != 0 )
- memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
- i++;
- }
- else if ( old_info[i].device > new_info[j].device )
- {
- if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
- j++;
- }
- else
- {
- if ( ((old_info[i].capacity == new_info[j].capacity) &&
- (old_info[i].info == new_info[j].info)) ||
- (xlvbd_remove_device(old_info[i].device) != 0) )
- memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
- else if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
- i++; j++;
- }
- }
-
- for ( ; i < old_nr; i++ )
- {
- if ( xlvbd_remove_device(old_info[i].device) != 0 )
- memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
- }
-
- for ( ; j < new_nr; j++ )
- {
- if ( xlvbd_init_device(&new_info[j]) == 0 )
- memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
- }
-
- vbd_info = merged_info;
- nr_vbds = k;
-
- kfree(old_info);
- kfree(new_info);
-}
-#endif
-
-/*
- * Set up all the linux device goop for the virtual block devices
- * (vbd's) that we know about. Note that although from the backend
- * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
- * number, the domain creation tools conventionally allocate these
- * numbers to correspond to those used by 'real' linux -- this is just
- * for convenience as it means e.g. that the same /etc/fstab can be
- * used when booting with or without Xen.
- */
-int xlvbd_init(void)
-{
- int i;
-
- /*
- * If compiled as a module, we don't support unloading yet. We
- * therefore permanently increment the reference count to
- * disallow it.
- */
- MOD_INC_USE_COUNT;
-
- memset(major_info, 0, sizeof(major_info));
-
- for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
- }
-
- vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
- nr_vbds = xlvbd_get_vbd_info(vbd_info);
-
- if (nr_vbds < 0) {
- kfree(vbd_info);
- vbd_info = NULL;
- nr_vbds = 0;
- } else {
- for (i = 0; i < nr_vbds; i++)
- xlvbd_init_device(&vbd_info[i]);
- }
-
- return 0;
-}